# Replication file for "Immigration and the Sociocultural Divide in Central and Eastern Europe: Stasis or Evolution?" by Caroline Marie Lancaster. Please contact cml93@live.unc.edu with any questions. 

# This file creates Figure 1 in the main text and Figures 1-3 in Appendix C using Eurobarometer data.

# 28 June 2021

library(readstata13)
library(survey)
library(ggplot2)
library(dplyr)

####### Figure 1, Appendix C
#read in and clean 2014
eb2014 <- read.dta13('nov2014eb.dta')
eb2014 <- eb2014[c('ISOCNTRY', 'QA3A_9', 'W23')]
colnames(eb2014) <- c('isocntry', 'im', 'w23')

eb2014 <- subset(eb2014, isocntry == 'BG' | isocntry == 'HR' | isocntry == 'CZ' | isocntry == 'EE' | isocntry == 'HU' | isocntry == 'LT' | isocntry == 'PL' | isocntry == 'RO' | isocntry == 'SI' | isocntry == 'SK')

eb2014$im <- as.numeric(eb2014$im)

eb14w <- svydesign(ids = ~1, data = eb2014, weights = eb2014$w23)
tab1 <- svytable(~isocntry + im, design = eb14w)
tab1 <- as.data.frame(tab1)
tab1 <- subset(tab1, im == 2)

tot1 <- svytotal(~isocntry, design = eb14w)
tot1 <- as.data.frame(tot1)
tot1 <- tot1[,-2]
tot1 <- as.data.frame(tot1)
tot1$isocntry <- c('BG', 'CZ', 'EE', 'HR', 'HU', 'LT', 'PL', 'RO', 'SI', 'SK')
dat14 <- merge(tot1, tab1, by = "isocntry")
dat14$per <- dat14$Freq/dat14$tot1 * 100
dat14 <- dat14[,-(2:4)]
dat14$year <- 2014

#read in and clean 2015
eb2015 <- read.dta13('nov2015eb.dta')
eb2015 <- eb2015[c('ISOCNTRY', 'QA3A_9', 'W23')]
colnames(eb2015) <- c('isocntry', 'im', 'w23')

eb2015 <- subset(eb2015, isocntry == 'BG' | isocntry == 'HR' | isocntry == 'CZ' | isocntry == 'EE' | isocntry == 'HU' | isocntry == 'LT' | isocntry == 'PL' | isocntry == 'RO' | isocntry == 'SI' | isocntry == 'SK')

eb2015$im <- as.numeric(eb2015$im)

eb15w <- svydesign(ids = ~1, data = eb2015, weights = eb2015$w23)
tab1 <- svytable(~isocntry + im, design = eb14w)
tab1 <- as.data.frame(tab1)
tab1 <- subset(tab1, im == 2)

tot1 <- svytotal(~isocntry, design = eb15w)
tot1 <- as.data.frame(tot1)
tot1 <- tot1[,-2]
tot1 <- as.data.frame(tot1)
tot1$isocntry <- c('BG', 'CZ', 'EE', 'HR', 'HU', 'LT', 'PL', 'RO', 'SI', 'SK')
dat15 <- merge(tot1, tab1, by = "isocntry")
dat15$per <- dat15$Freq/dat15$tot1 * 100
dat15 <- dat15[,-(2:4)]
dat15$year <- 2015

#read in and clean 2016
eb2016 <- read.dta13('may2016eb.dta')
eb2016 <- eb2016[c('ISOCNTRY', 'QA3A_9', 'W23')]
colnames(eb2016) <- c('isocntry', 'im', 'w23')

eb2016 <- subset(eb2016, isocntry == 'BG' | isocntry == 'HR' | isocntry == 'CZ' | isocntry == 'EE' | isocntry == 'HU' | isocntry == 'LT' | isocntry == 'PL' | isocntry == 'RO' | isocntry == 'SI' | isocntry == 'SK')

eb2016$im <- as.numeric(eb2016$im)

eb16w <- svydesign(ids = ~1, data = eb2016, weights = eb2016$w23)
tab1 <- svytable(~isocntry + im, design = eb16w)
tab1 <- as.data.frame(tab1)
tab1 <- subset(tab1, im == 2)

tot1 <- svytotal(~isocntry, design = eb16w)
tot1 <- as.data.frame(tot1)
tot1 <- tot1[,-2]
tot1 <- as.data.frame(tot1)
tot1$isocntry <- c('BG', 'CZ', 'EE', 'HR', 'HU', 'LT', 'PL', 'RO', 'SI', 'SK')
dat16 <- merge(tot1, tab1, by = "isocntry")
dat16$per <- dat16$Freq/dat16$tot1 * 100
dat16 <- dat16[,-(2:4)]
dat16$year <- 2016

#read in and clean 2017
eb2017 <- read.dta13('may2017eb.dta')
eb2017 <- eb2017[c('ISOCNTRY', 'QA3A_9', 'W23')]
colnames(eb2017) <- c('isocntry', 'im', 'w23')

eb2017 <- subset(eb2017, isocntry == 'BG' | isocntry == 'HR' | isocntry == 'CZ' | isocntry == 'EE' | isocntry == 'HU' | isocntry == 'LT' | isocntry == 'PL' | isocntry == 'RO' | isocntry == 'SI' | isocntry == 'SK')

eb2017$im <- as.numeric(eb2017$im)
eb17w <- svydesign(ids = ~1, data = eb2017, weights = eb2017$w23)
tab1 <- svytable(~isocntry + im, design = eb17w)
tab1 <- as.data.frame(tab1)
tab1 <- subset(tab1, im == 2)

tot1 <- svytotal(~isocntry, design = eb17w)
tot1 <- as.data.frame(tot1)
tot1 <- tot1[,-2]
tot1 <- as.data.frame(tot1)
tot1$isocntry <- c('BG', 'CZ', 'EE', 'HR', 'HU', 'LT', 'PL', 'RO', 'SI', 'SK')
dat17 <- merge(tot1, tab1, by = "isocntry")
dat17$per <- dat17$Freq/dat17$tot1 * 100
dat17 <- dat17[,-(2:4)]
dat17$year <- 2017

#read in and clean 2019
eb2019<- read.dta13('july2019eb.dta')
eb2019 <- eb2019[c('isocntry', 'qa3a_9', 'w23')]
colnames(eb2019) <- c('isocntry', 'im', 'w23')

eb2019 <- subset(eb2019, isocntry == 'BG' | isocntry == 'HR' | isocntry == 'CZ' | isocntry == 'EE' | isocntry == 'HU' | isocntry == 'LT' | isocntry == 'PL' | isocntry == 'RO' | isocntry == 'SI' | isocntry == 'SK')

eb2019$im <- as.numeric(eb2019$im)
eb19w <- svydesign(ids = ~1, data = eb2019, weights = eb2019$w23)
tab1 <- svytable(~isocntry + im, design = eb19w)
tab1 <- as.data.frame(tab1)
tab1 <- subset(tab1, im == 2)

tot1 <- svytotal(~isocntry, design = eb19w)
tot1 <- as.data.frame(tot1)
tot1 <- tot1[,-2]
tot1 <- as.data.frame(tot1)
tot1$isocntry <- c('BG', 'CZ', 'EE', 'HR', 'HU', 'LT', 'PL', 'RO', 'SI', 'SK')
dat19 <- merge(tot1, tab1, by = "isocntry")
dat19$per <- dat19$Freq/dat19$tot1 * 100
dat19 <- dat19[,-(2:4)]
dat19$year <- 2019

#combine weighted frequency data and plot

dat <- rbind(dat14, dat15, dat16, dat17, dat19)
dat$year <- as.factor(dat$year)

ebplot <- ggplot(dat, aes(fill=year, y=per, x=isocntry)) + 
  geom_bar(position="dodge", stat="identity") + scale_fill_manual(name="Year", labels=c("2014",'2015',"2016", '2017', '2019'), values = c("grey5","grey20", "grey50", 'grey65', 'grey85')) + labs(x = "Country", y = "Percentage") +theme_bw() + scale_x_discrete(labels=c("Bul","Cze", "Est", "Cro", "Hun", "Lit", "Pol","Rom", "Slk", 'Slo'))

###### Figure 1, main text 
#group weighted country data by year and plot
permean <- dat %>% group_by(year) %>% summarize(perc =mean(per))
permean$year <- as.factor(permean$year)

ebplot <- ggplot(permean, aes(fill=year, y=perc, x=year)) + 
  geom_bar(position="dodge", stat="identity") + scale_fill_manual(name="Year", labels=c("2014",'2015',"2016", '2017', '2019'), values = c("grey5","grey20", "grey50", 'grey65', 'grey85')) + labs(x = "Year", y = "Percentage") +theme_bw()

###### now, do the same for Western Europe
###### Figure 3, Appendix C
#read in and clean 2014
eb2014 <- read.dta13('nov2014eb.dta')
eb2014 <- eb2014[c('ISOCNTRY', 'QA3A_9', 'W23')]
colnames(eb2014) <- c('isocntry', 'im', 'w23')

eb2014 <- subset(eb2014, isocntry == 'AT' | isocntry == 'BE' | isocntry == 'DE-W' | isocntry == 'DE-E'| isocntry == 'DK' | isocntry == 'FI' | isocntry == 'FR' | isocntry == 'GB-GBN' | isocntry == 'GB-NIR'| isocntry == 'GR' | isocntry == 'IT' | isocntry == 'NL' | isocntry == 'SE'| isocntry == 'ES')

eb2014$im <- as.numeric(eb2014$im)

eb14w <- svydesign(ids = ~1, data = eb2014, weights = eb2014$w23)
tab1 <- svytable(~isocntry + im, design = eb14w)
tab1 <- as.data.frame(tab1)
tab1 <- subset(tab1, im == 2)

tot1 <- svytotal(~isocntry, design = eb14w)
tot1 <- as.data.frame(tot1)
tot1 <- tot1[,-2]
tot1 <- as.data.frame(tot1)
tot1$isocntry <- c('AT', 'BE', 'DE-E','DE-W', 'DK','ES', 'FI', 'FR', 'GB-GBN', 'GB-NIR','GR', 'IT', 'NL', 'SE')
dat14 <- merge(tot1, tab1, by = "isocntry")
dat14$per <- dat14$Freq/dat14$tot1 * 100
dat14 <- dat14[,-(2:4)]
#creating DE and GB by combining E/W and GB/NI
dat14[15,2] <- (dat14[3,2] + dat14[4,2]) / 2
dat14[16,2] <- (dat14[9,2] + dat14[10,2]) / 2
dat14[15, 1] <- 'DE'
dat14[16, 1] <- 'GB'
dat14 <- dat14[-c(3,4,9,10), ]
dat14$year <- 2014

#read in and clean 2015
eb2015 <- read.dta13('nov2015eb.dta')
eb2015 <- eb2015[c('ISOCNTRY', 'QA3A_9', 'W23')]
colnames(eb2015) <- c('isocntry', 'im', 'w23')

eb2015 <- subset(eb2015, isocntry == 'AT' | isocntry == 'BE' | isocntry == 'DE-W' | isocntry == 'DE-E'| isocntry == 'DK' | isocntry == 'FI' | isocntry == 'FR' | isocntry == 'GB-GBN' | isocntry == 'GB-NIR'| isocntry == 'GR' | isocntry == 'IT' | isocntry == 'NL' | isocntry == 'SE'| isocntry == 'ES')

eb2015$im <- as.numeric(eb2015$im)

eb15w <- svydesign(ids = ~1, data = eb2015, weights = eb2015$w23)
tab1 <- svytable(~isocntry + im, design = eb14w)
tab1 <- as.data.frame(tab1)
tab1 <- subset(tab1, im == 2)

tot1 <- svytotal(~isocntry, design = eb15w)
tot1 <- as.data.frame(tot1)
tot1 <- tot1[,-2]
tot1 <- as.data.frame(tot1)
tot1$isocntry <- c('AT', 'BE', 'DE-E','DE-W', 'DK','ES', 'FI', 'FR', 'GB-GBN', 'GB-NIR','GR', 'IT', 'NL', 'SE')
dat15 <- merge(tot1, tab1, by = "isocntry")
dat15$per <- dat15$Freq/dat15$tot1 * 100
dat15 <- dat15[,-(2:4)]
dat15[15,2] <- (dat15[3,2] + dat15[4,2]) / 2
dat15[16,2] <- (dat15[9,2] + dat15[10,2]) / 2
dat15[15, 1] <- 'DE'
dat15[16, 1] <- 'GB'
dat15 <- dat15[-c(3,4,9,10), ]
dat15$year <- 2015

#read in and clean 2016
eb2016 <- read.dta13('may2016eb.dta')
eb2016 <- eb2016[c('ISOCNTRY', 'QA3A_9', 'W23')]
colnames(eb2016) <- c('isocntry', 'im', 'w23')

eb2016 <- subset(eb2016, isocntry == 'AT' | isocntry == 'BE' | isocntry == 'DE-W' | isocntry == 'DE-E'| isocntry == 'DK' | isocntry == 'FI' | isocntry == 'FR' | isocntry == 'GB-GBN' | isocntry == 'GB-NIR'| isocntry == 'GR' | isocntry == 'IT' | isocntry == 'NL' | isocntry == 'SE'| isocntry == 'ES')

eb2016$im <- as.numeric(eb2016$im)

eb16w <- svydesign(ids = ~1, data = eb2016, weights = eb2016$w23)
tab1 <- svytable(~isocntry + im, design = eb16w)
tab1 <- as.data.frame(tab1)
tab1 <- subset(tab1, im == 2)

tot1 <- svytotal(~isocntry, design = eb16w)
tot1 <- as.data.frame(tot1)
tot1 <- tot1[,-2]
tot1 <- as.data.frame(tot1)
tot1$isocntry <- c('AT', 'BE', 'DE-E','DE-W', 'DK','ES', 'FI', 'FR', 'GB-GBN', 'GB-NIR','GR', 'IT', 'NL', 'SE')
dat16 <- merge(tot1, tab1, by = "isocntry")
dat16$per <- dat16$Freq/dat16$tot1 * 100
dat16 <- dat16[,-(2:4)]
dat16[15,2] <- (dat16[3,2] + dat16[4,2]) / 2
dat16[16,2] <- (dat16[9,2] + dat16[10,2]) / 2
dat16[15, 1] <- 'DE'
dat16[16, 1] <- 'GB'
dat16 <- dat16[-c(3,4,9,10), ]
dat16$year <- 2016

#read in and clean 2017
eb2017 <- read.dta13('may2017eb.dta')
eb2017 <- eb2017[c('ISOCNTRY', 'QA3A_9', 'W23')]
colnames(eb2017) <- c('isocntry', 'im', 'w23')

eb2017 <- subset(eb2017, isocntry == 'AT' | isocntry == 'BE' | isocntry == 'DE-W' | isocntry == 'DE-E'| isocntry == 'DK' | isocntry == 'FI' | isocntry == 'FR' | isocntry == 'GB-GBN' | isocntry == 'GB-NIR'| isocntry == 'GR' | isocntry == 'IT' | isocntry == 'NL' | isocntry == 'SE'| isocntry == 'ES')

eb2017$im <- as.numeric(eb2017$im)
eb17w <- svydesign(ids = ~1, data = eb2017, weights = eb2017$w23)
tab1 <- svytable(~isocntry + im, design = eb17w)
tab1 <- as.data.frame(tab1)
tab1 <- subset(tab1, im == 2)

tot1 <- svytotal(~isocntry, design = eb17w)
tot1 <- as.data.frame(tot1)
tot1 <- tot1[,-2]
tot1 <- as.data.frame(tot1)
tot1$isocntry <- c('AT', 'BE', 'DE-E','DE-W', 'DK','ES', 'FI', 'FR', 'GB-GBN', 'GB-NIR','GR', 'IT', 'NL', 'SE')
dat17 <- merge(tot1, tab1, by = "isocntry")
dat17$per <- dat17$Freq/dat17$tot1 * 100
dat17 <- dat17[,-(2:4)]
dat17[15,2] <- (dat17[3,2] + dat17[4,2]) / 2
dat17[16,2] <- (dat17[9,2] + dat17[10,2]) / 2
dat17[15, 1] <- 'DE'
dat17[16, 1] <- 'GB'
dat17 <- dat17[-c(3,4,9,10), ]
dat17$year <- 2017

# read in and clean 2019
eb2019<- read.dta13('july2019eb.dta')
eb2019 <- eb2019[c('isocntry', 'qa3a_9', 'w23')]
colnames(eb2019) <- c('isocntry', 'im', 'w23')

eb2019 <- subset(eb2019, isocntry == 'AT' | isocntry == 'BE' | isocntry == 'DE-W' | isocntry == 'DE-E'| isocntry == 'DK' | isocntry == 'FI' | isocntry == 'FR' | isocntry == 'GB' | isocntry == 'GR' | isocntry == 'IT' | isocntry == 'NL' | isocntry == 'SE'| isocntry == 'ES')

eb2019$im <- as.numeric(eb2019$im)
eb19w <- svydesign(ids = ~1, data = eb2019, weights = eb2019$w23)
tab1 <- svytable(~isocntry + im, design = eb19w)
tab1 <- as.data.frame(tab1)
tab1 <- subset(tab1, im == 2)

tot1 <- svytotal(~isocntry, design = eb19w)
tot1 <- as.data.frame(tot1)
tot1 <- tot1[,-2]
tot1 <- as.data.frame(tot1)
tot1$isocntry <- c('AT', 'BE', 'DE-E','DE-W', 'DK','ES', 'FI', 'FR', 'GB','GR', 'IT', 'NL', 'SE')
dat19 <- merge(tot1, tab1, by = "isocntry")
dat19$per <- dat19$Freq/dat19$tot1 * 100
dat19 <- dat19[,-(2:4)]
dat19[14,2] <- (dat19[3,2] + dat19[4,2]) / 2
dat19[14, 1] <- 'DE'
dat19 <- dat19[-c(3,4), ]
dat19$year <- 2019

#combind weighted frequency data and plot
dat <- rbind(dat14, dat15, dat16, dat17, dat19)
dat$year <- as.factor(dat$year)

ebplot <- ggplot(dat, aes(fill=year, y=per, x=isocntry)) + 
  geom_bar(position="dodge", stat="identity") + scale_fill_manual(name="Year", labels=c("2014",'2015',"2016", '2017', '2019'), values = c("grey5","grey20", "grey50", 'grey65', 'grey85')) + labs(x = "Country", y = "Percentage") +theme_bw() + scale_x_discrete(labels=c("Aus","Bel", "Ger", "Den", "Spa", "Fin", "Fra","GB", "Gre", 'Ita', 'Neth', 'Swe'))

###### Figure 2, Appendix C
#group weighted country data by year and plot
permean <- dat %>% group_by(year) %>% summarize(perc =mean(per))
permean$year <- as.factor(permean$year)

ebplot <- ggplot(permean, aes(fill=year, y=perc, x=year)) + 
  geom_bar(position="dodge", stat="identity") + scale_fill_manual(name="Year", labels=c("2014",'2015',"2016", '2017', '2019'), values = c("grey5","grey20", "grey50", 'grey65', 'grey85')) + labs(x = "Year", y = "Percentage") +theme_bw() 

######### END OF SCRIPT